In [3]:
import pandas as pd

Show me the first lines of the original file


In [14]:
df = pd.read_excel('/tmp/gonzalo_test/aseg.xls')
df.head()


Out[14]:
subject Index SegId NVoxels Volume_mm3 StructName normMean normStdDev normMin normMax normRang
0 55297 1 4 24645 24645.4 Left-Lateral-Ventricle 15.8438 11.5978 0.0 95.0 95.0
1 55297 2 5 408 408.4 Left-Inf-Lat-Vent 45.1204 12.7040 9.0 79.0 70.0
2 55297 3 7 15096 15095.8 Left-Cerebellum-White-Matter 88.0008 6.0972 18.0 105.0 87.0
3 55297 4 8 53369 53368.9 Left-Cerebellum-Cortex 64.8514 9.0857 13.0 165.0 152.0
4 55297 5 10 7452 7451.7 Left-Thalamus-Proper 88.8853 8.9959 49.0 109.0 60.0

Show me the region names containing 'Vent' or 'WM' or 'Hippo'


In [26]:
names = set([each for each in df['StructName'].tolist() \
             if 'WM' in each 
             or 'Vent' in each 
             or 'Hippo' in each])
names


Out[26]:
{u'3rd-Ventricle',
 u'4th-Ventricle',
 u'5th-Ventricle',
 u'Left-Hippocampus',
 u'Left-Inf-Lat-Vent',
 u'Left-Lateral-Ventricle',
 u'Left-VentralDC',
 u'Left-WM-hypointensities',
 u'Left-non-WM-hypointensities',
 u'Right-Hippocampus',
 u'Right-Inf-Lat-Vent',
 u'Right-Lateral-Ventricle',
 u'Right-VentralDC',
 u'Right-WM-hypointensities',
 u'Right-non-WM-hypointensities',
 u'WM-hypointensities',
 u'non-WM-hypointensities'}

Reshape the table and show me the first lines


In [27]:
df = pd.DataFrame(df[df['StructName'].isin(names)], columns=['subject', 'StructName', 'Volume_mm3'])
df = df.pivot(index='subject', columns='StructName', values='Volume_mm3')
df.head()


Out[27]:
StructName 3rd-Ventricle 4th-Ventricle 5th-Ventricle Left-Hippocampus Left-Inf-Lat-Vent Left-Lateral-Ventricle Left-VentralDC Left-WM-hypointensities Left-non-WM-hypointensities Right-Hippocampus Right-Inf-Lat-Vent Right-Lateral-Ventricle Right-VentralDC Right-WM-hypointensities Right-non-WM-hypointensities WM-hypointensities non-WM-hypointensities
subject
10010 977.3 1073.1 0.0 5004.7 125.4 7938.0 4161.4 0.0 0.0 5197.9 165.9 7546.8 3766.3 0.0 0.0 1077.5 16.0
10013 1664.2 1421.9 0.0 4010.4 400.5 13127.5 3373.0 0.0 0.0 3857.0 268.1 11640.2 3371.3 0.0 0.0 2248.2 32.9
10015 1169.6 1034.0 0.0 4656.0 280.9 7856.5 3953.5 0.0 0.0 4771.1 323.1 9392.0 3846.2 0.0 0.0 1241.3 15.8
10016 554.9 998.7 12.9 4788.4 275.8 3982.5 3948.4 0.0 0.0 5122.7 108.9 3088.8 3807.7 0.0 0.0 712.8 14.1
10019 1990.1 2295.1 0.0 4984.5 183.9 9272.8 4051.0 0.0 0.0 4948.4 127.1 9330.8 3796.3 0.0 0.0 3267.2 32.1

Save it and success !


In [28]:
df.to_excel('/tmp/gonzalo_test/aseg_pivot.xls')

In [25]:
from IPython.display import Image
Image(url='http://s2.quickmeme.com/img/c3/c37a6cc5f88867e5387b8787aaf67afc350b3f37f357ed0a3088241488063bce.jpg')


Out[25]:

In [ ]: